## This script contains replication code for Figure 4 and Figures I1-I9

set.seed(30324)
rm(list=ls())
library(stm)
library(tidyverse)
library(dotwhisker)
setwd("C:/Users/m1049/Dropbox/Wang_PB Replication/data")

## preprocessing

df <- read.csv("open-ended.csv")

income <- read.csv("income.csv")

df<-left_join(df, income, by=c("ipaddress","income_bin"))

names(df)

df<-df[,c(4,5,6,7,8)]

names(df)

df<-df %>% rename(open_1=open_1.1)
df<-df %>% filter(!is.na(open_1))

df$open_1 <- gsub("[\u2018\u2019\u201A\u201B\u2032\u2035]", "'",df$open_1)

df$treatment<-factor(df$treatment, levels=c("Control", "Salience", "Information Provision", "Equal Treatment Norm"))

class(df$treatment); df$treatment

processed <- textProcessor(df$open_1, metadata = df)
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <-out$meta

##

m2 <- stm(out$documents, out$vocab, K = 3,
          prevalence =~ treatment,
          max.em.its = 75, data = out$meta, init.type = "Spectral")


# Figures I4-I6  wordcloud plot

pdf("fig_cloud_topic1.pdf", height = 2.5, width = 3)
cloud(m2, topic = 1, scale = c(4,0.25))
dev.off()

pdf("fig_cloud_topic2.pdf", height = 2.5, width = 3)
cloud(m2, topic = 2, scale = c(4,0.25))
dev.off()

pdf("fig_cloud_topic3.pdf", height = 2.5, width = 3)
cloud(m2, topic = 3, scale = c(4,0.4))
dev.off()


# Figures I1-I3 labeling topic

labelTopics(m2)

plot(m2, type = "summary")

thoughts1 <- findThoughts(m2, texts=meta$open_1, topics=1, n=5)$docs[[1]]; thoughts1 # policy preferences
thoughts2 <- findThoughts(m2, texts=meta$open_1, topics=2, n=5)$docs[[1]]; thoughts2 # general opinion on tax already high
thoughts3 <- findThoughts(m2, texts=meta$open_1, topics=3, n=5)$docs[[1]]; thoughts3 # poor burden

pdf("fig_represponse_topic1.pdf", height = 6, width = 12)
par(family = "Times", ps = 14)
plotQuote(thoughts1, width = 100, main = "")
dev.off()

pdf("fig_represponse_topic2.pdf", height = 6, width = 12)
par(family = "Times", ps = 14)
plotQuote(thoughts2, width = 100, main = "")
dev.off()

pdf("fig_represponse_topic3.pdf", height = 6, width = 12)
par(family = "Times", ps = 14)
plotQuote(thoughts3, width = 100, main = "")
dev.off()

# Effect estimate


out$meta$treatment <- as.factor(out$meta$treatment)
prep <- estimateEffect(1:3 ~ treatment, m2,
                       meta = out$meta, uncertainty = "Global")
model_summary<-summary(prep, topics=1:3);model_summary

# information Provision Treatment: coefplot


term_translated = c("dont, need, increas, alreadi, fair,enough",
                    "incom, lower, poor",
                    "school, educ, health")

estimate = rep(NA, 3)
std.error = rep(NA, 3)
for(i in 1:length(model_summary$tables)){
  estimate[i] = model_summary$tables[[i]][3,1]
  std.error[i] = model_summary$tables[[i]][3,2]
}

stm_results = data.frame("term" = term_translated, "estimate" = estimate, "std.error"= std.error)

stm_results = stm_results[order(stm_results$estimate, decreasing = T),]

# Figure 4

dwplot(stm_results, 
       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), dot_args = list(size = 2.5)) +
  theme_minimal() + xlab("\nCoefficient Estimate of the Information Provision Treatment") +
  theme(text = element_text(size=12),axis.text.y = element_text(size = 12), axis.text.x = element_text(size=12, angle = 0, hjust = 1), legend.position = "none") +
  scale_colour_grey(start = 0, end = 0)

ggsave(file="information_stm.pdf", width=8, height=6)


# Figure I8 salience: coefplot

estimate = rep(NA, 3)
std.error = rep(NA, 3)
for(i in 1:length(model_summary$tables)){
  estimate[i] = model_summary$tables[[i]][2,1]
  std.error[i] = model_summary$tables[[i]][2,2]
}

stm_results = data.frame("term" = term_translated, "estimate" = estimate, "std.error"= std.error)

stm_results = stm_results[order(stm_results$estimate, decreasing = T),]

dwplot(stm_results, 
       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), dot_args = list(size = 2.5)) +
  theme_minimal() + xlab("\nCoefficient Estimate of the Salience Condition") +
  theme(text = element_text(size=12),axis.text.y = element_text(size = 12), axis.text.x = element_text(size=12, angle = 0, hjust = 1), legend.position = "none") +
  scale_colour_grey(start = 0, end = 0)

ggsave(file="salience_stm.pdf", width=8, height=6)

# Figure I9 Equal Treatment Norm: coefplot

estimate = rep(NA, 3)
std.error = rep(NA, 3)
for(i in 1:length(model_summary$tables)){
  estimate[i] = model_summary$tables[[i]][4,1]
  std.error[i] = model_summary$tables[[i]][4,2]
}

stm_results = data.frame("term" = term_translated, "estimate" = estimate, "std.error"= std.error)

stm_results = stm_results[order(stm_results$estimate, decreasing = T),]

dwplot(stm_results, 
       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), dot_args = list(size = 2.5)) +
  theme_minimal() + xlab("\nCoefficient Estimate of the Equal Treatment Norm") +
  theme(text = element_text(size=12),axis.text.y = element_text(size = 12), axis.text.x = element_text(size=12, angle = 0, hjust = 1), legend.position = "none") +
  scale_colour_grey(start = 0, end = 0)

ggsave(file="equal_stm.pdf", width=8, height=6)

## Figure I7 information provision treatment interacts with income

m3 <- stm(out$documents, out$vocab, K = 3,
          prevalence =~ treatment*income,
          max.em.its = 75, data = out$meta, init.type = "Spectral")


labelTopics(m3)

plot(m3, type = "summary")

thoughts1 <- findThoughts(m3, texts=meta$open_1, topics=1, n=30); thoughts1 # policy preferences
thoughts2 <- findThoughts(m3, texts=meta$open_1, topics=2, n=20); thoughts2 # general opinion on tax already high
thoughts3 <- findThoughts(m3, texts=meta$open_1, topics=3, n=20); thoughts3 # poor burden


out$meta$treatment <- as.factor(out$meta$treatment)
prep <- estimateEffect(c(2) ~ treatment*income, m3,
                       meta = out$meta, uncertainty = "Global")
model_summary<-summary(prep, topics=2);model_summary

pdf(file = "stm_info_income_interaction.pdf", width=8, height=6)

par(family = "Times", ps = 14)

plot.estimateEffect(prep,
                    covariate = "income",
                    model = m3,
                    method = "continuous",
                    moderator="treatment",
                    moderator.value = c("Information Provision"),
                    xlab = "Household Income (15 = Highest)",
                    ylim = c(0, 0.5),
                    linecol = "blue", 
                    main="Topic 2: Hurting the Poor",
                    printlegend = F)


plot.estimateEffect(prep,
                    covariate = "income",
                    model = m3,
                    method = "continuous",
                    moderator="treatment",
                    moderator.value = c("Control"),
                    xlab = "Household Income (15 = Highest)",
                    ylim = c(0, 0.5),
                    linecol = "red", 
                    main="Topic 2: Hurting the Poor",
                    add = T,
                    printlegend = F)


legend(1, 0.5, c("Information Provision", "Control"),
       lwd = 2, col = c("blue", "red"))
dev.off()
